pacman::p_load(tidyverse, readr, psych, st, stars, tmap, neaSG)EDA_Wankee
1. Import Packages
weather_list <- list.files(path = "data",
recursive = TRUE,
pattern = "\\.csv$",
full.names = TRUE)
list(weather_list)[[1]]
[1] "data/aspatial/Admiralty.csv"
[2] "data/aspatial/Ang Mo Kio.csv"
[3] "data/aspatial/Boon Lay (East).csv"
[4] "data/aspatial/Changi.csv"
[5] "data/aspatial/Choa Chu Kang (South).csv"
[6] "data/aspatial/Clementi.csv"
[7] "data/aspatial/East Coast Parkway.csv"
[8] "data/aspatial/Jurong (West).csv"
[9] "data/aspatial/Khatib.csv"
[10] "data/aspatial/Marina Barrage.csv"
[11] "data/aspatial/Newton.csv"
[12] "data/aspatial/Pasir Panjang.csv"
[13] "data/aspatial/Paya Lebar.csv"
[14] "data/aspatial/Seletar.csv"
[15] "data/aspatial/Sembawang.csv"
[16] "data/aspatial/Tai Seng.csv"
[17] "data/aspatial/Tengah.csv"
[18] "data/aspatial/Tuas South.csv"
weather <- read_csv(weather_list)
weather <- weather %>%
rename_with(~ gsub(" ", "_", .x), everything())
glimpse(weather)Rows: 168,836
Columns: 13
$ Station <chr> "Admiralty", "Admiralty", "Admiralty", "Admir…
$ Year <dbl> 2009, 2009, 2009, 2009, 2009, 2009, 2009, 200…
$ Month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ Day <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…
$ Daily_Rainfall_Total <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Highest_30_Min_Rainfall <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Highest_60_Min_Rainfall <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Highest_120_Min_Rainfall <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Mean_Temperature <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Maximum_Temperature <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Minimum_Temperature <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Mean_Wind_Speed <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Max_Wind_Speed <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
describe(weather) vars n mean sd median trimmed mad min
Station* 1 168836 10.51 5.02 12 10.69 5.93 1
Year 2 168836 2006.25 12.34 2010 2007.10 13.34 1980
Month 3 168836 6.54 3.45 7 6.55 4.45 1
Day 4 168836 15.73 8.80 16 15.72 11.86 1
Daily_Rainfall_Total* 5 167188 210.70 322.45 4 145.27 2.97 1
Highest_30_Min_Rainfall* 6 51716 70.59 123.67 3 40.26 1.48 1
Highest_60_Min_Rainfall* 7 51683 80.76 140.96 3 46.24 1.48 1
Highest_120_Min_Rainfall* 8 51686 91.24 153.54 3 54.96 1.48 1
Mean_Temperature* 9 109321 56.07 12.31 56 56.29 13.34 1
Maximum_Temperature* 10 128383 89.37 17.25 92 90.62 14.83 1
Minimum_Temperature* 11 128336 54.13 14.39 53 53.54 14.83 1
Mean_Wind_Speed* 12 128355 186.14 87.96 229 196.52 34.10 1
Max_Wind_Speed* 13 93405 208.43 75.11 202 206.25 75.61 1
max range skew kurtosis se
Station* 18 17 -0.24 -1.27 0.01
Year 2023 43 -0.50 -0.99 0.03
Month 12 11 -0.01 -1.21 0.01
Day 31 30 0.01 -1.19 0.02
Daily_Rainfall_Total* 1283 1282 1.37 0.72 0.79
Highest_30_Min_Rainfall* 435 434 1.77 1.81 0.54
Highest_60_Min_Rainfall* 555 554 1.88 2.49 0.62
Highest_120_Min_Rainfall* 622 621 1.78 2.15 0.68
Mean_Temperature* 96 95 -0.22 0.03 0.04
Maximum_Temperature* 143 142 -0.83 1.29 0.05
Minimum_Temperature* 105 104 0.34 -0.18 0.04
Mean_Wind_Speed* 271 270 -0.98 -0.80 0.25
Max_Wind_Speed* 524 523 0.31 0.09 0.25
Save rds
write_rds(weather, "data/weather.rds")
Load rds
weather <- readRDS("data/weather.rds")mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019") %>%
st_transform(crs=3414)Reading layer `MPSZ-2019' from data source
`/Users/chockwankee/Documents/chockwk/Group11_VAP/EDA/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
glimpse(mpsz)Rows: 332
Columns: 7
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
tmap_mode("view")
tm_shape(mpsz) +
tm_polygons(col = "REGION_N", palette = "Set2")+
tm_layout(main.title = "Planning Area",
main.title.position = "left",
main.title.size = 1,
legend.show = FALSE,
frame = FALSE) +
tmap_options(check.and.fix = TRUE) +
tm_view(set.zoom.limits = c(11,12))tmap_mode("view")
tm_shape(mpsz) +
tm_polygons(col = "PLN_AREA_C", palette = "Set2")+
tm_layout(main.title = "Planning Area",
main.title.position = "left",
main.title.size = 1,
legend.show = FALSE,
frame = FALSE) +
tm_view(set.zoom.limits = c(11,12))tmap_mode("view")
tm_shape(mpsz) +
tm_polygons(col = "SUBZONE_N", palette = "Set2")+
tm_layout(main.title = "Planning Area",
main.title.position = "left",
main.title.size = 1,
legend.show = FALSE,
frame = FALSE) +
tm_view(set.zoom.limits = c(11,12))unique(mpsz$PLN_AREA_N) [1] "MARINA EAST" "RIVER VALLEY"
[3] "SINGAPORE RIVER" "WESTERN ISLANDS"
[5] "MUSEUM" "MARINE PARADE"
[7] "SOUTHERN ISLANDS" "BUKIT MERAH"
[9] "DOWNTOWN CORE" "STRAITS VIEW"
[11] "QUEENSTOWN" "OUTRAM"
[13] "MARINA SOUTH" "ROCHOR"
[15] "KALLANG" "TANGLIN"
[17] "NEWTON" "CLEMENTI"
[19] "BEDOK" "PIONEER"
[21] "JURONG EAST" "ORCHARD"
[23] "GEYLANG" "BOON LAY"
[25] "BUKIT TIMAH" "NOVENA"
[27] "TOA PAYOH" "TUAS"
[29] "JURONG WEST" "SERANGOON"
[31] "BISHAN" "TAMPINES"
[33] "BUKIT BATOK" "HOUGANG"
[35] "CHANGI BAY" "PAYA LEBAR"
[37] "ANG MO KIO" "PASIR RIS"
[39] "BUKIT PANJANG" "TENGAH"
[41] "SELETAR" "SUNGEI KADUT"
[43] "YISHUN" "MANDAI"
[45] "PUNGGOL" "CHOA CHU KANG"
[47] "SENGKANG" "CHANGI"
[49] "CENTRAL WATER CATCHMENT" "SEMBAWANG"
[51] "WESTERN WATER CATCHMENT" "WOODLANDS"
[53] "NORTH-EASTERN ISLANDS" "SIMPANG"
[55] "LIM CHU KANG"
unique(weather$Station) [1] "Admiralty" "Ang Mo Kio" "Boon Lay (East)"
[4] "Changi" "Choa Chu Kang (South)" "Clementi"
[7] "East Coast Parkway" "Jurong (West)" "Khatib"
[10] "Marina Barrage" "Newton" "Pasir Panjang"
[13] "Paya Lebar" "Seletar" "Sembawang"
[16] "Tai Seng" "Tengah" "Tuas South"
station_to_subzone <- c(
"Admiralty" = "WOODLANDS",
"Ang Mo Kio" = "ANG MO KIO",
"Boon Lay (East)" = "BOON LAY",
"Changi" = "CHANGI",
"Choa Chu Kang (South)" = "CHOA CHU KANG",
"Clementi" = "CLEMENTI",
"East Coast Parkway" = "BEDOK",
"Jurong (West)" = "JURONG WEST",
"Khatib" = "YISHUN",
"Marina Barrage" = "MARINA SOUTH",
"Newton" = "NEWTON",
"Pasir Panjang" = "PASIR PANJANG",
"Paya Lebar" = "PAYA LEBAR",
"Seletar" = "SELETAR",
"Sembawang" = "SEMBAWANG",
"Tai Seng" = "HOUGANG",
"Tengah" = "TENGAH",
"Tuas South" = "TUAS"
)
weather$Subzone <- station_to_subzone[weather$Station]
weather <- weather[, c("Subzone", setdiff(names(weather), "Subzone"))]
head(weather)# A tibble: 6 × 14
Subzone Station Year Month Day Daily_Rainfall_Total Highest_30_Min_Rainf…¹
<chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
1 WOODLAN… Admira… 2009 1 1 <NA> <NA>
2 WOODLAN… Admira… 2009 1 2 <NA> <NA>
3 WOODLAN… Admira… 2009 1 3 <NA> <NA>
4 WOODLAN… Admira… 2009 1 4 <NA> <NA>
5 WOODLAN… Admira… 2009 1 5 <NA> <NA>
6 WOODLAN… Admira… 2009 1 6 <NA> <NA>
# ℹ abbreviated name: ¹Highest_30_Min_Rainfall
# ℹ 7 more variables: Highest_60_Min_Rainfall <chr>,
# Highest_120_Min_Rainfall <chr>, Mean_Temperature <chr>,
# Maximum_Temperature <chr>, Minimum_Temperature <chr>,
# Mean_Wind_Speed <chr>, Max_Wind_Speed <chr>
weather <- weather %>%
mutate_at(vars(Daily_Rainfall_Total,
Highest_30_Min_Rainfall,
Highest_60_Min_Rainfall,
Highest_120_Min_Rainfall,
Mean_Temperature,
Maximum_Temperature),
as.numeric)weather_map <- weather %>%
group_by(Subzone, Station, Year) %>%
summarise(Annual_Rainfall_Total = sum(Daily_Rainfall_Total, na.rm = TRUE),
Annual_Highest_30_Min_Rainfall = sum(Highest_30_Min_Rainfall, na.rm = TRUE),
Annual_Highest_60_Min_Rainfall = sum(Highest_60_Min_Rainfall, na.rm = TRUE),
Annual_Highest_120_Min_Rainfall = sum(Highest_120_Min_Rainfall, na.rm = TRUE),
Annual_Mean_Temperature = sum(Mean_Temperature, na.rm = TRUE),
Annual_Maximum_Temperature = sum(Maximum_Temperature, na.rm = TRUE),
Annual_Minimum_Temperature = sum(Minimum_Temperature, na.rm = TRUE)) %>%
ungroup()Error in `summarise()`:
ℹ In argument: `Annual_Minimum_Temperature = sum(Minimum_Temperature,
na.rm = TRUE)`.
ℹ In group 1: `Subzone = "ANG MO KIO"`, `Station = "Ang Mo Kio"`, `Year =
2009`.
Caused by error in `sum()`:
! invalid 'type' (character) of argument
glimpse(weather_map)Error in eval(expr, envir, enclos): object 'weather_map' not found
mpszweather <- left_join(mpsz, weather_map,
by = c("PLN_AREA_N" = "Subzone"))Error in eval(expr, envir, enclos): object 'weather_map' not found
glimpse(mpszweather)Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
tm_polygons(col = "Annual_Rainfall_Total",
palette = "Blues",
style = "jenks")Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
tm_polygons(col = "Annual_Highest_30_Min_Rainfall",
palette = "Blues",
style = "jenks")Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
tm_polygons(col = "Annual_Mean_Temperature",
palette = "Oranges",
style = "jenks")Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
tm_polygons(col = "Annual_Maximum_Temperature",
palette = "Oranges",
style = "jenks")Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
tm_polygons(col = "Annual_Minimum_Temperature",
palette = "Oranges",
style = "jenks")Error in eval(expr, envir, enclos): object 'mpszweather' not found